	

	******************************************************************
	**
	**
	**		DETAILS: 	This file takes raw historical survey
	**					data and creates a data set of 
	**					vote intention by education group
	**					by year, as well as proportions
	**					of the population in each group.
	**
	**		NOTE:		I do not have permission to re-post
	**					the full original surveys, so the
	**					replication file includes only the
	**					variables required for my analysis.
	**					The full original files are all
	**					publicly available: through
	**					Roper center for 1973–1988, 
	**					from Noam Lupu for 1993–2006,
	**					and from LAPOP for 2012.
	**
	**
	******************************************************************
		





*-------------------------------------------------------------------------------
* 1973 Survey
*-------------------------------------------------------------------------------


* loop over two education group options
*--------------------------------------
	/* baseline (main text), in which the top
	   education group is "secondary complete + tertiary complete,"
	   and tertiary (appendix), in which the 
	   top education group is tertiary only */
	   
foreach edgroup in Baseline Tertiary {


* original data
*--------------

use "data/inputs/Survey1973.dta", clear
		
		
* Indicators for voting AD in each year
*--------------------------------------
	
gen voteAD_1958 = .
replace voteAD_1958 = 1 if vote1958 == 1
replace voteAD_1958 = 0 if vote1958 == 2 | vote1958 == 3

gen voteAD_1963 = .
replace voteAD_1963 = 1 if vote1963 == 1
replace voteAD_1963 = 0 if vote1963 >= 2 & vote1963 <= 6

gen voteAD_1968 = .
replace voteAD_1968 = 1 if vote1968 == 1
replace voteAD_1968 = 0 if vote1968 >= 2 & vote1968 <= 6

gen voteAD_1973 = .
replace voteAD_1973 = 1 if vote1973 == 1
replace voteAD_1973 = 0 if vote1973 >= 2 & vote1973 <= 14


* Education groups
*-----------------

gen educ = "No primary" if educ_original == 0 | educ_original == 1
replace educ = "Primary complete" if educ_original == 2 | educ_original == 3
replace educ = "Secondary complete" if educ_original >= 4 & educ_original <= 7
replace educ = "Tertiary complete" if educ_original == 8	

	if "`edgroup'" == "Baseline" {
	/* This survey has <1% tertiary complete people */
	replace educ = "Secondary + tertiary" if regexm(educ, "Secondary") | regexm(educ, "Tertiary")
	}
	
	else {
	}
		
tab educ, gen(educ)	/* Group dummies; n.b. there are no missings */


* Loop over years in survey
*--------------------------
	
forvalues year = 1958(5)1973 {
	
	
	* capture relationship between education and vote share
	*------------------------------------------------------		
	
	if "`edgroup'" == "Baseline" { /* Top two groups together */
	reg voteAD_`year' educ1-educ3 if educ ~= "", nocons r
	lincomest 100*(_b[educ3] - _b[educ1])
		tempfile dif`year'_`edgroup'
		parmest, saving(`dif`year'_`edgroup'') idstr("dif`year'_`edgroup'") level(95)
		}
	
	else { 
	reg voteAD_`year' educ1-educ4 if educ ~= "", nocons r
	lincomest 100*(_b[educ4] - _b[educ1])
		tempfile dif`year'_`edgroup'
		parmest, saving(`dif`year'_`edgroup'') idstr("dif`year'_`edgroup'")	level(95)
		}
	
} /* Loop over years */
} /* Loop over edgroups */
	


*-------------------------------------------------------------------------------
* 1983 survey
*-------------------------------------------------------------------------------


* original data
*--------------

use "data/inputs/Survey1983.dta", clear


* Education groups
*-----------------

gen educ = "No primary" if educ_original == 1	 | educ_original == 2
replace educ = "Primary complete" if educ_original == 3 | educ_original == 4
replace educ = "Secondary complete" if educ_original == 5 | educ_original == 6
replace educ = "Tertiary complete" if educ_original == 7
drop if educ == "" /* Dropping missings */
tab educ, gen(educ)	 


* Vote AD
*--------
	
gen voteAD_1978 = .
replace voteAD_1978 = 1 if vote1978 == 1
replace voteAD_1978 = 0 if vote1978 >= 2 & vote1978 <= 6
		
gen voteAD_1983 = .
replace voteAD_1983 = 1 if vote1983 == 1
replace voteAD_1983 = 0 if vote1983 >= 2 & vote1983 <= 7
					
							
* Loop over years
*----------------
	
forvalues year = 1978(5)1983 {

		
	* capture relationship between education and vote share
	*------------------------------------------------------		
		
	reg voteAD_`year' educ1-educ4 if educ ~= "", nocons r	
	lincomest 100*(_b[educ4] - _b[educ1])	
		tempfile dif`year'
		parmest, saving(`dif`year'') idstr("dif`year'") level(95)
	
}


*-------------------------------------------------------------------------------
* 1988
*-------------------------------------------------------------------------------

	
* original data
*--------------
	
use "data/inputs/Survey1988.dta", clear

		
* Vote AD
*--------	

gen voteAD_1988 = .
replace voteAD_1988 = 1 if vote1988 == 1
replace voteAD_1988 = 0 if vote1988 ~= 1 & vote1988 ~= . 
		 

* Education groups
*-----------------

gen educ = "No primary" if educ_original == 1	 | educ_original == 2
replace educ = "Primary complete" if educ_original == 3 | educ_original == 4
replace educ = "Secondary complete" if educ_original == 5 | educ_original == 6
replace educ = "Tertiary complete" if educ_original == 7
tab educ, gen(educ)	/* N.b. no missings */
	
	
* years
*------
	
local year = 1988	


	* capture relationship between education and vote share
	*------------------------------------------------------		
		
	reg voteAD educ1-educ4, nocons r
	lincomest 100*(_b[educ4] - _b[educ1])	
		tempfile dif`year'
		parmest, saving(`dif`year'') idstr("dif`year'") level(95)
	


*-------------------------------------------------------------------------------
* 1993–2006
*-------------------------------------------------------------------------------
	
	
* Lupu replication data
*----------------------
	
use "data/inputs/Survey1993_2006_Lupu.dta", clear


* Vote AD
*--------		

gen voteAD_1993 = .
replace voteAD_1993 = 1 if vote1993 == 4
replace voteAD_1993 = 0 if vote1993 >= 1 & vote1993 <= 3


* Education groups
*-----------------

tab educ, gen(educ) /* Missings excluded below */


* loop over weighting options
*----------------------------
	
local i = 1
foreach weight in "[aw=weight]" " " {


* 1993
*-----

local year = 1993

		
	* capture relationship between education and vote share
	*------------------------------------------------------		

	tempfile dif`year'_`i'
	reg voteAD_`year' educ1-educ4 if educ ~= . & year == `year' `weight', nocons r
	lincomest 100*(_b[educ4] - _b[educ1])
		parmest, saving(`dif`year'_`i'') idstr("dif`year'_`i'") level(95)



* 1998-2006
*----------

foreach year in 1998 2000 2006 {

	
	* capture relationship between education and vote share
	*------------------------------------------------------		

	reg chavez educ1-educ4 if educ ~= . & year == `year' `weight', nocons r
	tempfile dif`year'_`i'
		lincomest 100*(_b[educ4] - _b[educ1])
		parmest, saving(`dif`year'_`i'') idstr("dif`year'_`i'") level(95)

	}

* close loop over weights
*------------------------

local i = `i' + 1
}
	


	
			**	**	**	**	**	**	**	**	**	**	**	**	**
			**	**	**	**	**	**	**	**	**	**	**	**	**
			**	**	**	**	**	**	**	**	**	**	**	**	**
	


*-------------------------------------------------------------------------------
* 2012
*-------------------------------------------------------------------------------


* raw LAPOP data
*---------------

use "data/inputs/Survey2012_LAPOP.dta", clear


* vote intention
*---------------

gen chavez = .
replace chavez = 1 if vote2012 == 2
replace chavez = 0 if vote2012 == 3	


* Education groups
*-----------------

gen educ = "No primary" if educ_original < 6
replace educ = "Primary complete" if educ_original >= 6 & educ_original < 11
replace educ = "Secondary complete" if educ_original >= 11 & educ_original < 15
replace educ = "Tertiary complete" if educ_original >= 15 & educ_original ~= .a & educ_original ~= .b
tab educ, gen(educ)


* year
*-----
	
local year = 2012	
	
	
	* capture relationship between education and vote share
	*------------------------------------------------------		

	reg chavez educ1-educ4 if educ ~= "", nocons r
	lincomest 100*(_b[educ4] - _b[educ1])	
		tempfile dif`year'
		parmest, saving(`dif`year'') idstr("dif`year'") level(95)
	


	
			**	**	**	**	**	**	**	**	**	**	**	**	**
			**	**	**	**	**	**	**	**	**	**	**	**	**
			**	**	**	**	**	**	**	**	**	**	**	**	**
	


*-------------------------------------------------------------------------------
* append results and save
*-------------------------------------------------------------------------------


* difference of interest
*-----------------------

clear
	foreach edgroup in Baseline Tertiary {
	forvalues year = 1958(5)1973 {
	di "dif`year'_`edgroup'"
	append using `dif`year'_`edgroup''
	}
	}
	
foreach year of numlist 1978(5)1988 2012 {
	append using `dif`year''
}

foreach year of numlist 1993 1998 2000 2006 {
	append using `dif`year'_1'
	di "dif`year'_2"
	append using `dif`year'_2'
}	


* save data for appendix table
*-----------------------------

preserve
gen version = "Baseline" if regexm(idstr, "Tertiary") == 0 & regexm(idstr, "_1") == 0
replace version = "Tertiary" if regexm(idstr, "Tertiary")
replace version = "Weighted" if regexm(idstr, "_1")
keep idstr estimate version
replace idstr = subinstr(idstr, "_1", "", .)
replace idstr = subinstr(idstr, "_2", "", .)
destring idstr, replace ignore("dif" "_Baseline" "_Tertiary")
reshape wide estimate, i(version) j(idstr)
save "data/outputs/EducAlternativesAppendix.dta", replace
restore


* save for main graph
*--------------------

rename idstr year
drop if regexm(year, "Tertiary") | regexm(year, "_1")
replace year = subinstr(year, "_2", "", .)
destring year, ignore("dif" "_Baseline") replace
save "data/outputs/EducMstr.dta", replace


* End	
